{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "def3ec8b",
   "metadata": {},
   "source": [
    "### P034 使用Numpy实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2eae14b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fbe5ae86",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>years</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>5250</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   years  salary\n",
       "0      1    4000\n",
       "1      2    4250\n",
       "2      3    4500\n",
       "3      4    4750\n",
       "4      5    5000\n",
       "5      6    5250"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'years': [1, 2, 3, 4, 5, 6],\n",
    "                   'salary': [4000, 4250, 4500, 4750, 5000, 5250]})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0e5dcb8b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = len(df)\n",
    "m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a92c17e8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 3, 4, 5, 6], dtype=int64)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X1 = df[\"years\"].values\n",
    "X1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d4e0fa33",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 1.],\n",
       "       [1., 2.],\n",
       "       [1., 3.],\n",
       "       [1., 4.],\n",
       "       [1., 5.],\n",
       "       [1., 6.]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.append(np.ones((m, 1)), X1.reshape(m, 1), axis=1)\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8d94eb43",
   "metadata": {},
   "outputs": [],
   "source": [
    "y = df[\"salary\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "41aef3fe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 27750., 101500.])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.dot(X.T, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f99446c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "coefs = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6b6f2c3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3750.,  250.])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coefs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f1c2ff66",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'y = 3749.9999999999964 + 250.0x'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f\"y = {coefs[0]} + {coefs[1]}x\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fb285893",
   "metadata": {},
   "source": [
    "### P035 使用Sklearn实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d783a200",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f8924670",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>years</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>5250</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   years  salary\n",
       "0      1    4000\n",
       "1      2    4250\n",
       "2      3    4500\n",
       "3      4    4750\n",
       "4      5    5000\n",
       "5      6    5250"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fa643b27",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6f88a631",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(\n",
    "    df[[\"years\"]],\n",
    "    df[[\"salary\"]]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c9687e18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3750.])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "bf4a5567",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[250.]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "fbd45e9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'y = 3750.0 + 250.0x'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f\"y = {model.intercept_[0]} + {model.coef_[0][0]}x\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79d0107d",
   "metadata": {},
   "source": [
    "### P036 读取csv实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "66545376",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "7dd07317",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"./p036.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5c0aae3c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.136602</td>\n",
       "      <td>-13.152922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.418556</td>\n",
       "      <td>-36.316645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.744814</td>\n",
       "      <td>104.184840</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   variable      target\n",
       "0 -1.136602  -13.152922\n",
       "1 -1.418556  -36.316645\n",
       "2  1.744814  104.184840"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "60ed9891",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ed0fb4f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(\n",
    "    df[[\"variable\"]],\n",
    "    df[\"target\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "e94a2e9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5531873717999665"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.score(\n",
    "    df[[\"variable\"]],\n",
    "    df[\"target\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b227b500",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
